This geospatial model uses routinely collected malaria case data, population data and remotely sensed data, such as open and vegetated water bodies, to estimate population living around open water bodies, and ultimately quantify the association between proximity to larval habitat and malaria risk in health facility catchment areas in Kasungu. The buffer layer around waterbodies are created and then combined with population data in raster format to estimate the total population that is within the buffer. The data used spans from 2017 to 2020 and was derived from digitized DHIS2 malaria records, aggregated population geospatial layer and TropWet tool in Google Earth Engine.

Load packages

Loading the R packages that will be used to read in, view, transform and model the malaria cases and spatial datasets.

library(spatialEco)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(plotly)
library(lubridate)
library(knitr)
library(raster)
library(rgdal)
library(rgeos)
library(sf)
library(sp)
library(tmap)
library(spdep)
library(maptools)
library(gridExtra)
library(grid)
library(exactextractr)
library(DataExplorer)
library(mapview)
`%>%` <- magrittr::`%>%`

Tell R where the data is

here::here()
## [1] "C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021"

Load datasets

The total dry season malaria cases recorded at health-care facilities in Kasungu from 2017 to 2019 are contained in the KasunguData.csv sourced from https://dhis2.health.gov.mw/. The kasungu_facility_catchments_2004.shp shapefile also contains the population and health information within each health-facility catchment area in Kasungu district. The aggregated population raster layers for Malawi e.g.,ku_pop_2017_1km_aggregated.tif were downloaded from the Open Spatial and Demographic and Data Research website: https://www.worldpop.org/geodata/country?iso3=MWI. These layers estimate total number of people per grid-cell. The units are number of people per pixel with country totals adjusted to match the corresponding official United Nations population estimates. The datasets were downloaded in Geotiff at a resolution of 1km and are projected in Geographic Coordinate System, WGS84. The kasungu_water.shpand water_bodies layers contain open and vegetated waterbodies polygons, detected using the Tropical Wetland Unmixing Tool (TropWet). TropWet is a Google Earth Engine hosted toolbox that uses the Landsat archive to map tropical wetlands and can be accessed through: https://www.aber.ac.uk/en/dges/research/earth-observation-laboratory/research/tropwet/

# 2017, 2018 and 2019 dry season malaria cases 
dry_season_malaria_2015_2019 <- read.csv(here::here("data", "dry_season_malaria 2015-2019.csv"))

# Explore malaria data
# dry_season_malaria_2015_2019 %>% dplyr::glimpse() %>% 
#   DataExplorer::introduce() %>% 
#   DataExplorer::plot_missing()

# Export 2017, 2018 and 2019 dry season malaria cases
write.csv(dry_season_malaria_2015_2019, file = "data/dry_season_malaria_2017_2019.csv")

write.table(dry_season_malaria_2015_2019, file = "dry_season_malaria_2017_2019.txt", 
            sep = ",", quote = FALSE, row.names = FALSE)

# 2020 dry season malaria cases
ku_malaria_2020 <- read.csv(here::here("data", "dry_season_malaria_2020.csv"))

ku_malaria_2020 %>% dplyr::glimpse()
## Rows: 36
## Columns: 9
## $ X              <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ~
## $ Names          <chr> "Anchor Farm Health Centre", "Bua Health Centre (Kasung~
## $ May.2020       <int> 1172, 2293, 855, NA, 258, 806, 2468, 1327, 1190, 81, 80~
## $ June.2020      <int> 571, 1310, 24, 50, 126, 551, 1756, 66, 1102, 26, 511, N~
## $ July.2020      <int> 438, 634, 143, 37, 242, 314, 1691, 799, 667, 14, 407, N~
## $ August.2020    <int> 184, 482, 122, 137, 259, 146, 1177, 640, 375, 6, 372, N~
## $ September.2020 <int> 214, 565, 115, 117, 225, 202, 1240, 626, 270, 8, 413, N~
## $ October.2020   <int> 258, 910, 231, 201, 276, 392, 1296, 738, 353, 12, 494, ~
## $ dr_2020        <int> 2837, 6194, 1490, 542, 1386, 2411, 9628, 4196, 3957, 14~
# Merge 2015 to 2019 dry season malaria case data with 2020 data
dry_season_malaria_2017_2020 <- cbind.data.frame(dry_season_malaria_2015_2019, ku_malaria_2020) 
 
dry_season_malaria_2017_2020 <- dry_season_malaria_2017_2020[,c("FID", "Names", "dr_2017", 
                                                                "dr_2018", "dr_2019", "dr_2020",
                                                                "LONGITU", "LATITUD")]

# Export 'dry season malaria 2017-2020' as csv
write.csv(dry_season_malaria_2017_2020, file = "data/dry_season_malaria_2017_2019.csv")

dry_season_malaria_2017_2020 %>% dplyr::glimpse()
## Rows: 36
## Columns: 8
## $ FID     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,~
## $ Names   <chr> "Anchor Farm", "Bua Health Centre", "Chamama Health Facility",~
## $ dr_2017 <int> 2966, 3489, 1878, NA, 3601, 2292, 5801, 2192, 2745, NA, NA, NA~
## $ dr_2018 <int> 3142, 1804, 1750, NA, 4027, 2116, 4502, 2394, 4138, NA, NA, NA~
## $ dr_2019 <int> 1978, 1740, 1508, NA, 1686, 1770, 5470, 2553, 2200, NA, NA, NA~
## $ dr_2020 <int> 2837, 6194, 1490, 542, 1386, 2411, 9628, 4196, 3957, 147, 3003~
## $ LONGITU <dbl> 33.39000, 33.53859, 33.77686, NA, 33.68528, 33.79701, 33.30816~
## $ LATITUD <dbl> -13.41000, -13.29403, -12.92431, NA, -13.10757, -12.97452, -12~
# Kasungu district boundary shapefile 
kasungu_district <- st_read(here::here("data", "kasungu_district.shp"))
## Reading layer `kasungu_district' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_district.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 5 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 491272.7 ymin: 8494349 xmax: 609044.2 ymax: 8632164
## Projected CRS: WGS 84 / UTM zone 36S
# Kasungu health facility catchment boundary shapefiles
malire <- sf::st_read(here::here("data", "kasungu_health_facility_catchments.shp")) # from 20ish years ago
## Reading layer `kasungu_health_facility_catchments' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_health_facility_catchments.shp' using driver `ESRI Shapefile'
## Simple feature collection with 34 features and 21 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 491272.8 ymin: 8494349 xmax: 609044.2 ymax: 8632164
## Projected CRS: WGS 84 / UTM zone 36S
malire_new <- sf::st_read(here::here("data", "Kasungu_new_health_fac_catchment_clip.shp")) # generated from accessibility mapping
## Reading layer `Kasungu_new_health_fac_catchment_clip' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\Kasungu_new_health_fac_catchment_clip.shp' using driver `ESRI Shapefile'
## Simple feature collection with 29 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 491878.5 ymin: 8494645 xmax: 609044.2 ymax: 8631908
## Projected CRS: WGS 84 / UTM zone 36S
# malire_new <- spTransform(malire_new, proj4string(malire_new))

# Kasungu population raster layer
kasungu_population_2017 <- raster(here::here("data", "ku_pop_2017_1km_aggregated.tif"))
kasungu_population_2018 <- raster(here::here("data", "ku_pop_2018_1km_aggregated.tif"))
kasungu_population_2019 <- raster(here::here("data", "ku_pop_2019_1km_aggregated.tif"))
kasungu_population_2020 <- raster(here::here("data", "ku_pop_2020_1km_aggregated.tif"))

# Open waterbodies polygons 
water_2017 <- st_read(here::here("data", "water_bodies_2017.shp"))
## Reading layer `water_bodies_2017' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\water_bodies_2017.shp' using driver `ESRI Shapefile'
## Simple feature collection with 168 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 514497 ymin: 8495941 xmax: 603149.8 ymax: 8620169
## Projected CRS: WGS 84 / UTM zone 36S
water_2018 <- st_read(here::here("data", "kasungu_2018_water.shp"))
## Reading layer `kasungu_2018_water' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_2018_water.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1105 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 496807.6 ymin: 8494693 xmax: 607913.8 ymax: 8607747
## Projected CRS: WGS 84 / UTM zone 36S
water_2019 <- st_read(here::here("data", "kasungu_2019_water.shp"))
## Reading layer `kasungu_2019_water' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_2019_water.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1941 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 494197.2 ymin: 8494693 xmax: 607913.8 ymax: 8617573
## Projected CRS: WGS 84 / UTM zone 36S
water_2020 <- st_read(here::here("data", "water_bodies_2020.shp"))
## Reading layer `water_bodies_2020' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\water_bodies_2020.shp' using driver `ESRI Shapefile'
## Simple feature collection with 266 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 508985.6 ymin: 8495793 xmax: 585761.1 ymax: 8620169
## Projected CRS: WGS 84 / UTM zone 36S
# Add a field ID to water bodies polygons 
water_2017$ID <- 1:nrow(water_2017)
water_2018$ID <- 1:nrow(water_2018)
water_2019$ID <- 1:nrow(water_2019)
water_2020$ID <- 1:nrow(water_2020)

View the dry season malaria case data

We observe that Kasungu district has 30 health facilities classified as dispensary, health centre, district hospital and rural hospital and the highest malaria cases were recorded at Kasungu District Hospital.

dry_season_malaria_2017_2020 %>% 
  glimpse() %>% 
  summary()
## Rows: 36
## Columns: 8
## $ FID     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,~
## $ Names   <chr> "Anchor Farm", "Bua Health Centre", "Chamama Health Facility",~
## $ dr_2017 <int> 2966, 3489, 1878, NA, 3601, 2292, 5801, 2192, 2745, NA, NA, NA~
## $ dr_2018 <int> 3142, 1804, 1750, NA, 4027, 2116, 4502, 2394, 4138, NA, NA, NA~
## $ dr_2019 <int> 1978, 1740, 1508, NA, 1686, 1770, 5470, 2553, 2200, NA, NA, NA~
## $ dr_2020 <int> 2837, 6194, 1490, 542, 1386, 2411, 9628, 4196, 3957, 147, 3003~
## $ LONGITU <dbl> 33.39000, 33.53859, 33.77686, NA, 33.68528, 33.79701, 33.30816~
## $ LATITUD <dbl> -13.41000, -13.29403, -12.92431, NA, -13.10757, -12.97452, -12~
##       FID           Names              dr_2017         dr_2018     
##  Min.   : 1.00   Length:36          Min.   :  427   Min.   :  749  
##  1st Qu.: 9.75   Class :character   1st Qu.: 2196   1st Qu.: 2424  
##  Median :18.50   Mode  :character   Median : 3132   Median : 3357  
##  Mean   :18.50                      Mean   : 3586   Mean   : 3909  
##  3rd Qu.:27.25                      3rd Qu.: 3993   3rd Qu.: 4684  
##  Max.   :36.00                      Max.   :16289   Max.   :15821  
##                                     NA's   :6       NA's   :6      
##     dr_2019         dr_2020         LONGITU         LATITUD      
##  Min.   :  533   Min.   :    0   Min.   :33.18   Min.   :-13.57  
##  1st Qu.: 1748   1st Qu.: 2286   1st Qu.:33.38   1st Qu.:-13.25  
##  Median : 2556   Median : 3824   Median :33.50   Median :-12.98  
##  Mean   : 2880   Mean   : 4657   Mean   :33.52   Mean   :-12.99  
##  3rd Qu.: 3284   3rd Qu.: 6212   3rd Qu.:33.68   3rd Qu.:-12.79  
##  Max.   :10721   Max.   :24424   Max.   :33.87   Max.   :-12.42  
##  NA's   :6                       NA's   :6       NA's   :6
dry_season_malaria_2017_2020 %>%  
  plotly::plot_ly(y = ~Names,
                  x = ~dr_2017,
                  type = "bar",
                  orientation = 'h',
                  name = "2017") %>%
  plotly::add_trace(x = ~ dr_2018,
                    name = "2018") %>%
  plotly::add_trace(x = ~ dr_2019,
                    name = "2019") %>% 
  plotly::add_trace(x = ~ dr_2020,
                    name = "2020") %>% 
  plotly::layout(#barmode = "stack",
                 xaxis = list(title = "Total malaria cases"),
                 yaxis = list(title = ""),
                 hovermode = "compare",
                 margin = list(b = 10,
                               t = 10,
                               pad = 2))

Fig.1 The total malaria cases recorded at each health-care facility in Kasungu district

View location of Kasungu health-care facilities within health facility catchment area

Heath facility catchment area is the area from which a health facility attracts patients. Data provider is National Statistical Office.

# Using the is.na() function to remove health centres with missing longitude and latitude coordinates
# Aggregating Kasalika Health Centre and Kasungu District Hospital, and 
# Kaluluma Rural Hospital and Nkhamenya Rural Hospital malaria cases

# health_facility_aggregated <- dry_season_malaria_2017_2020[!is.na(dry_season_malaria_2017_2020$LONGITU),] %>% 
#   dplyr::filter(Names != "Kasalika Health Centre",
#                 Names != "Bua Health Centre",
#                 Names != "Kaluluma Rural Hospital") %>% 
#   dplyr::mutate(dr_2017 = ifelse(Names == "Kasungu District Hospital", 4528 + 16289, dr_2017), 
#                 dr_2018 = ifelse(Names == "Kasungu District Hospital", 4493 + 15821, dr_2018), 
#                 dr_2019 = ifelse(Names == "Kasungu District Hospital", 2729 + 10721, dr_2018), 
#                 dr_2020 = ifelse(Names == "Kasungu District Hospital", 4368 + 24424, dr_2020),
#                 dr_2017 = ifelse(Names == "Nkhamenya Rural Hospital", 2887 + 752, dr_2017), 
#                 dr_2018 = ifelse(Names == "Nkhamenya Rural Hospital", 851 + 3689, dr_2018), 
#                 dr_2019 = ifelse(Names == "Nkhamenya Rural Hospital", 533 + 4004, dr_2019),
#                 dr_2020 = ifelse(Names == "Nkhamenya Rural Hospital", 3587 + 5929, dr_2020),
#                 dr_2017 = ifelse(Names == "Mziza Health Centre", 3863 + 3489, dr_2017),
#                 dr_2018 = ifelse(Names == "Mziza Health Centre", 2815 + 1804, dr_2018),
#                 dr_2019 = ifelse(Names == "Mziza Health Centre", 2815 + 1804, dr_2019),
#                 dr_2020 = ifelse(Names == "Mziza Health Centre", 2397 + 6194, dr_2020)) 

zipatala_aggregated <- dry_season_malaria_2017_2020[complete.cases(dry_season_malaria_2017_2020),] %>% 
  dplyr::filter(Names != "Kasalika Health Centre",
                Names != "Bua Health Centre",
                Names != "Kaluluma Rural Hospital") 

zipatala_aggregated$dr_2017[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 4528 + 16289

zipatala_aggregated$dr_2018[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 4493 +15821

zipatala_aggregated$dr_2019[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 2729 + 10721

zipatala_aggregated$dr_2020[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 4368 + 24424
  
zipatala_aggregated$dr_2017[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 2887 + 752

zipatala_aggregated$dr_2018[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 851 + 3689

zipatala_aggregated$dr_2019[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 533 + 4004

zipatala_aggregated$dr_2020[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 3587 + 5929

zipatala_aggregated$dr_2017[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 3863 + 3489

zipatala_aggregated$dr_2018[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 2815 + 1804

zipatala_aggregated$dr_2019[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 2439 + 1740

zipatala_aggregated$dr_2020[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 6194 + 2397

# There are NA values in dry_season_malaria_2017_2020.csv, replace them by 0 and add a column to keep information
# dry_season_malaria_2017_2020$is_na = ifelse(is.na(dry_season_malaria_2017_2020$LONGITU), TRUE, FALSE)
# index = dry_season_malaria_2017_2020$is_na == TRUE
# dry_season_malaria_2017_2020[index, "LONGITU"] <- 0
# dry_season_malaria_2017_2020[index, "LATITUD"] <- 0
# 
# dry_season_malaria_2017_2020 <- SpatialPointsDataFrame(coords = dry_season_malaria_2017_2020[, 7:8],
#                                    data = dry_season_malaria_2017_2020[,1:6])
# 
health_facility_aggr_sf <- sf::st_as_sf(zipatala_aggregated,
                                        coords = c("LONGITU", "LATITUD"),
                                        crs = 4326, agr = "constant")
# Save as shapefile
# sf::st_write(health_facility_sf, "data/kasungu_zipatala_aggregated.shp", overwrite = TRUE)

# mapview::mapview(health_facility_aggr_sf)

# Set to interactive view
# tmap_mode('view')

old_catchment <- tm_shape(malire)+
  tm_polygons()+
  tm_shape(health_facility_aggr_sf)+
  tm_dots(size = .3, col = "blue", alpha = 0.5)+
  tm_text("Names", size = .3, just = "top", 
          col = "black", remove.overlap = TRUE)+
  tm_layout(frame = FALSE,
            title = "Old catchment boundaries",
            title.size = .8, 
            title.position = c("left", "top"))+
  tm_scale_bar(breaks = c(0, 10, 20), 
               text.size = .5)

new_catchment <- tm_shape(malire_new)+
  tm_polygons()+
  tm_shape(health_facility_aggr_sf)+
  tm_dots(size = .3, col = "blue", alpha = 0.5)+
  tm_text("Names", size = .3, just = "top", 
          col = "black", remove.overlap = TRUE)+
  tm_layout(frame = FALSE,
            title = "New catchment boundaries",
            title.size = .8, 
            title.position = c("left", "top"))+
  tm_compass(position=c("right", "top"))

tmap::tmap_arrange(old_catchment, new_catchment)
Fig 2. Kasungu Health-care Facilities and Catchment Areas

Fig 2. Kasungu Health-care Facilities and Catchment Areas

View population and malaria rate for Malawi by health facility catchment area

Using population and health information within each health facility catchment area we produce a choropleth map colored in proportion to a statistical variable that represents an aggregate summary of a geographic characteristic, in this case total population, population density, malaria rate and total malaria cases.

# Take a look at the variables, CRS and geometry type
# head(malire)

# Function to create a choropleth map from sf object
choroplethmap <- function(df, vname = NA, pal = NA, legend.title = NA){
  # choropleth map
  # df: simple feature polygon layer
  # vname: variable name (as character, in quotes)
  # pal: color palette
  # legend.title: legend title in quotes
  # returns:
  # a tmap element (plots a map)
  tm_shape(df)+
    tm_fill(col = vname, style = "quantile",
            palette = pal, n = 5, title = legend.title)+
    tm_borders()+
    tm_layout(legend.position = c("right","bottom"))
}


population <- choroplethmap(malire, vname = "POPULATION", 
                            pal = "Reds", legend.title = "Total population")

pop_density <- choroplethmap(malire, vname = "DENSITY", 
                             pal = "YlOrRd", legend.title = "Population density")

malaria_rate <- choroplethmap(malire, vname = "MALAR_RATE", 
                              pal = "BuGn", legend.title = "Malaria prevalence %")

malaria_cases <- choroplethmap(malire, vname = "MALARIA_CA", 
                               pal = "Purples", legend.title = "Malaria cases")

tmap_arrange(population, pop_density, malaria_rate, malaria_cases)
Fig.3 Population and health information for each health facility catchment area

Fig.3 Population and health information for each health facility catchment area

View raster population metadata and estimated population per grid-cell

CRS for kasungu_population layers is already in WGS 84 UTM Zone 36 South, which is the base projected coordinate system for Malawi and has units in meters, hence no need to transform it.The highest estimated population per grid-cell is 7,949 people in 2020.

# Check out the CRS and values of the population layers
kasungu_population_2017
## class      : RasterLayer 
## dimensions : 150, 128, 19200  (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667  (x, y)
## extent     : 491272.7, 609044.2, 8494349, 8632164  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs 
## source     : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2017_1km_aggregated.tif 
## names      : ku_pop_2017_1km_aggregated
kasungu_population_2018
## class      : RasterLayer 
## dimensions : 150, 128, 19200  (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667  (x, y)
## extent     : 491272.7, 609044.2, 8494349, 8632164  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs 
## source     : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2018_1km_aggregated.tif 
## names      : ku_pop_2018_1km_aggregated 
## values     : 0, 6253.557  (min, max)
kasungu_population_2019
## class      : RasterLayer 
## dimensions : 150, 128, 19200  (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667  (x, y)
## extent     : 491272.7, 609044.2, 8494349, 8632164  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs 
## source     : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2019_1km_aggregated.tif 
## names      : ku_pop_2019_1km_aggregated 
## values     : 0, 6483.727  (min, max)
kasungu_population_2020
## class      : RasterLayer 
## dimensions : 150, 128, 19200  (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667  (x, y)
## extent     : 491272.7, 609044.2, 8494349, 8632164  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs 
## source     : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2020_1km_aggregated.tif 
## names      : ku_pop_2020_1km_aggregated 
## values     : 0, 7949.033  (min, max)
# Function to create a raster population map
populationmap <- function(df, title){
  # population map
  # arguments:
  #   df:  aggregated population raster layer
  #   legend.title: legend title
  # returns:
  #   a tmap-element (plots a map)
  tm_shape(df)+
    tm_raster(palette = "Reds", title = title,
              breaks = c(0,100,200,400,600,800,1000,2000,4000,6000,8000))+
    tm_layout(legend.position = c("right", "bottom"),
              frame = FALSE)+
    tm_scale_bar(position = c("left", "bottom"))
}
# Set to static map
tmap_mode("plot")

pop_2017 <- populationmap(kasungu_population_2017, title = "2017 Population")

pop_2018 <- populationmap(kasungu_population_2018, title = "2018 Population")

pop_2019 <- populationmap(kasungu_population_2019, title = "2019 Population")

pop_2020 <- populationmap(kasungu_population_2020, title = "2020 Population")

tmap_arrange(pop_2017, pop_2018, pop_2019, pop_2020, nrow = 2) # Layout the maps
Fig.4 Estimated total number of people per 1km grid-cell

Fig.4 Estimated total number of people per 1km grid-cell

Buffer, combine and transform TropWet derived waterbody polygons

Buffers of 30m radius have been created around open waterbodies and the geometries of overlapping polygons are unioned together. st_union returns a single geometry sfc object, which is why st_cast and st_as_sf functions have been used to cast and convert the multipolygon buffer geometries to a dissolved or split polygon geometry collection.

surfaceWater_2017 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2017, dist = 30)), "POLYGON"))

surfaceWater_2018 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2018, dist = 30)), "POLYGON"))

surfaceWater_2019 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2019, dist = 30)), "POLYGON"))

surfaceWater_2020 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2020, dist = 30)), "POLYGON"))

Create a function for computing buffers around open waterbody polygons

waterbody_buffer <- function(waterbody, distance, catchment){
  
  #Buffer the 'water' vector file by 'distance' meters
  buffer_radiusk <- st_buffer(waterbody, distance)
  
  # Dissolve the buffers
  buffer_radiusk_union <- st_as_sf(st_cast(st_union(buffer_radiusk),"MULTIPOLYGON"))
  
  # Assign Attributes of the 'catchment' to each of the waterbodies. 
  int_radiusk <- st_intersection(buffer_radiusk_union, catchment)
  open_water_buffer <- st_as_sf(int_radiusk)
  
  # Polygons being seen to be in multiple catchments
  st_intersects(open_water_buffer, catchment)
  
  # Make the assumption that the attribute is constant throughout the geometry
  st_agr(open_water_buffer) = "constant"
  st_agr(catchment) = "constant"
  
  return(out = open_water_buffer)
}

Create buffers using Kasungu open waterbodies and health-facility catchment boundary layers

st_buffer has been used to compute 1km, 2km and 3km buffers around each waterbody polygon. Then geometry of the buffer features are then combined resulting in resolved internal boundaries. Invalid waterbody polygons can be checked by using st_is_valid which returns by default NA on corrupt geometries.

# For 2017 TropWet surface water polygons
buffer_1km_2017 <- waterbody_buffer(waterbody = surfaceWater_2017, 
                                    distance = 1000, catchment = malire_new)

buffer_2km_2017 <- waterbody_buffer(waterbody = surfaceWater_2017, 
                                    distance = 2000, catchment = malire_new)

buffer_3km_2017 <- waterbody_buffer(waterbody = surfaceWater_2017, 
                                    distance = 3000, catchment = malire_new)

# For 2018 TropWet surface water polygons
buffer_1km_2018 <- waterbody_buffer(waterbody = surfaceWater_2018, 
                                    distance = 1000, catchment = malire)

buffer_2km_2018 <- waterbody_buffer(waterbody = surfaceWater_2018, 
                                    distance = 2000, catchment = malire)

buffer_3km_2018 <- waterbody_buffer(waterbody = surfaceWater_2018, 
                                    distance = 3000, catchment = malire)

# For 2019 TropWet surface water polygons
buffer_1km_2019 <- waterbody_buffer(waterbody = surfaceWater_2019, 
                                    distance = 1000, catchment = malire)

buffer_2km_2019 <- waterbody_buffer(waterbody = surfaceWater_2019, 
                                    distance = 2000, catchment = malire)

buffer_3km_2019 <- waterbody_buffer(waterbody = surfaceWater_2019, 
                                    distance = 3000, catchment = malire)

# For 2020 TropWet surface water polygons
buffer_1km_2020 <- waterbody_buffer(waterbody = surfaceWater_2020, 
                                    distance = 1000, catchment = malire)

buffer_2km_2020 <- waterbody_buffer(waterbody = surfaceWater_2020, 
                                    distance = 2000, catchment = malire)

buffer_3km_2020 <- waterbody_buffer(waterbody = surfaceWater_2020, 
                                    distance = 3000, catchment = malire)

View the intersected buffers

# Map the buffers
buffermap <- function(df, boundary, title = NA){
  # function for creating buffer map in ggplot
  # arguments:
  #   df:  buffer polygon layer
  #   boundary: Kasungu district boundary layer
  #   title: main title
  # returns:
  #   a map-element (plots a map)
  ggplot(data = df)+
     geom_sf()+
     geom_sf(data = boundary, fill = NA)+
     theme_classic()+
     labs(title = title)
 }

# For 2017 
buffer1km_2017 <- buffermap(buffer_1km_2017, kasungu_district, title = "2017: 1km Buffers")
buffer2km_2017 <- buffermap(buffer_2km_2017, kasungu_district, title = "2017: 2km Buffers")
buffer3km_2017 <- buffermap(buffer_3km_2017, kasungu_district, title = "2017: 3km Buffers")

# For 2018
buffer1km_2018 <- buffermap(buffer_1km_2018, kasungu_district, title = "2018: 1km Buffers")
buffer2km_2018 <- buffermap(buffer_2km_2018, kasungu_district, title = "2018: 2km Buffers")
buffer3km_2018 <- buffermap(buffer_3km_2018, kasungu_district, title = "2018: 3km Buffers")

# For 2019
buffer1km_2019 <- buffermap(buffer_1km_2019, kasungu_district, title = "2019: 1km Buffers")
buffer2km_2019 <- buffermap(buffer_2km_2019, kasungu_district, title = "2019: 2km Buffers")
buffer3km_2019 <- buffermap(buffer_3km_2019, kasungu_district, title = "2019: 3km Buffers")

# For 2020
buffer1km_2020 <- buffermap(buffer_1km_2020, kasungu_district, title = "2020: 1km Buffers")
buffer2km_2020 <- buffermap(buffer_2km_2020, kasungu_district, title = "2020: 2km Buffers")
buffer3km_2020 <- buffermap(buffer_3km_2020, kasungu_district, title = "2020: 3km Buffers")
 
grid.arrange(buffer1km_2017, buffer1km_2018, buffer1km_2019, buffer1km_2020,
             buffer2km_2017, buffer2km_2018, buffer2km_2019, buffer2km_2020, 
             buffer3km_2017, buffer3km_2018, buffer3km_2019, buffer3km_2020, ncol = 4)
Fig 5. Buffers around open waterbodies in Kasungu

Fig 5. Buffers around open waterbodies in Kasungu

Estimating population living in various distances from open water bodies and in each health facility catchment

Here, we create a function that uses open water bodies, health facility catchment boundary and population datasets to estimate the number of people living within 1km, 2km and 3km buffers surrounding the waterbodies. This involves overlaping and intersecting different data layers (buffers of waterbodies, catchment boundary, population raster, etc), so that we can apportion population from one layer to another. In this model, the layer with population estimate is kasungu_population_* and the target layer that does not have an estimate, but for which we desire one, is kasungu_health_facility_catchment/malire. The function returns an object called finalized that has estimated population within the buffers zones, and multipolygon geometry.

nachulu <- function(water, distance, catchment, raster_population){
  # function to estimate population out of raster and vector layers
  # arguments:
  #    water: waterbody polygon layer
  #    distance: buffer distance in meters
  #    catchment: health facility catchment boundary layer generated from accessibility mapping
  #    raster_population: aggregated population raster layer
  # returns:
  #    finalized: sf objects with a data frame containing estimated population
  
  
  #Buffer the 'water' vector file by 'distance' meters
  buffer_radiusk <- st_buffer(water, distance)
  
  # Dissolve the buffers. Unioning geometries dissolves for instance internal polygon 
  # boundaries, which otherwise would lead to invalid MULTIPOLYGON errors in subsequent analysis.
  buffer_radiusk_union <- sf::st_as_sf(st_cast(st_union(buffer_radiusk),"POLYGON"))
  
  # Split the buffered water file by the boundaries of the catchment area. 
  # We don't want to allocate the attributes in this step 
  int_radiusk <- st_intersection(buffer_radiusk_union, st_geometry(catchment))
  water_int_radiusk <- sf::st_as_sf(int_radiusk)
  
  # Convert the MULTIPOLYGON object into several POLYGON objects
  water_int_radiusk <- st_cast(st_buffer(water_int_radiusk,0.0), "MULTIPOLYGON") %>% 
    st_cast("POLYGON")
  
  # Polygons being seen to be in multiple catchments
  st_intersects(water_int_radiusk, catchment)
  
  # Estimation of population within X kilometer buffer.
  # Extracting zonal statistics from a population raster layer. 
  # The population raster is a continuous gridded surface layer that assign an 
  # estimated population density value to every square in their grid. 
  # The population statistics are then summed and apportioned to the buffer polygons
  water_int_radiusk$pop_est<- raster::extract(raster_population, water_int_radiusk, 
                                              fun = sum, na.rm = TRUE)
  
  # Make the assumption that the attribute is constant throughout the geometry
  st_agr(water_int_radiusk) = "constant"
  st_agr(catchment) = "constant"
  
  # Find which catchment each polygon belongs to using its centroid - a point dataset 
  # representing the geographic center-points of the polygons 
  assign_catchment <- st_intersection(st_centroid(water_int_radiusk), catchment)
  
  
  # Calculated total population living X distance for each facility  
  # Notice that the assign_catchment is comprised of separate POLYGONS (assign_catchment$x). 
  # The first step is to “dissolve” away these POLYGONS into one MULTIPOLYGON. 
  # There is no sf equivalent to the ArcMap “dissolve” operation. 
  # Instead we use a combination of group_by and summarize from the dplyr package. 
  # Stats::aggregate from sf package, and dplyr::summarize both do essentially the same.
  npeople <- assign_catchment %>% dplyr::group_by(DN) %>%
    summarize(pop_estimate = sum(pop_est, na.rm = TRUE))
  
  finalized <- merge(catchment, st_drop_geometry(npeople), by='DN', all.x = TRUE)
  return(out=finalized)
}

Run the model to estimate population living around open waterbodies

Using the nachulu function, here we estimate the number of people surrounding waterbodies in each health facility catchment area using attributes from the open waterbody buffers, health facility catchment boundary and aggregated population raster layers. That is, population living in various distances from open water bodies e.g. 1km, 2km and 3 km is estimated and assigned to health facilities.

# For 2017 ---------------------------------------------------------------------
# Estimate population living within 1km radius from water bodies
run1k_2017<- nachulu(water = surfaceWater_2017, distance = 1000, 
                     catchment = malire_new,
                     raster_population = kasungu_population_2017)

run1k_2017$pop_1km <- run1k_2017$pop_estimate

# Estimate population living within 2km radius from water bodies
run2k_2017<- nachulu(water = surfaceWater_2017, distance = 2000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2017)

run2k_2017$pop_2km <- run2k_2017$pop_estimate

# Estimate population living within 3km radius from water bodies
run3k_2017<- nachulu(water = surfaceWater_2017, distance = 3000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2017)

run3k_2017$pop_3km <- run3k_2017$pop_estimate


# For 2018 ---------------------------------------------------------------------
# Estimate population living within 1km radius from water bodies
run1k_2018<- nachulu(water = surfaceWater_2018, distance = 1000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2018)

run1k_2018$pop_1km <- run1k_2018$pop_estimate

# Estimate population living within 2km radius from water bodies
run2k_2018<- nachulu(water = surfaceWater_2018, distance = 2000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2018)

run2k_2018$pop_2km <- run2k_2018$pop_estimate

# Estimate population living within 3km radius from water bodies
 run3k_2018 <- nachulu(water = surfaceWater_2018, distance = 3000, 
                       catchment = malire_new, 
                       raster_population = kasungu_population_2018)
 
 run3k_2018$pop_3km <- run3k_2018$pop_estimate


# For 2019 ---------------------------------------------------------------------
# Estimate population living within 1km radius from water bodies
run1k_2019 <- nachulu(water = surfaceWater_2019, distance = 1000, 
                      catchment = malire_new, 
                      raster_population = kasungu_population_2019)

run1k_2019$pop_1km <- run1k_2019$pop_estimate

# Estimate population living within 2km radius from water bodies
run2k_2019<- nachulu(water = surfaceWater_2019, distance = 2000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2019)

run2k_2019$pop_2km <- run2k_2019$pop_estimate

# Estimate population living within 3km radius from water bodies
run3k_2019<- nachulu(water = surfaceWater_2019, distance = 3000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2019)

run3k_2019$pop_3km <- run3k_2019$pop_estimate


# For 2020 ---------------------------------------------------------------------
#  Estimate population living within 1km radius from water bodies
run1k_2020 <- nachulu(water = surfaceWater_2020, distance = 1000, 
                      catchment = malire_new, 
                      raster_population = kasungu_population_2020)

run1k_2020$pop_1km <- run1k_2020$pop_estimate

# Estimate population living within 2km radius from water bodies
run2k_2020 <- nachulu(water = surfaceWater_2020, distance = 2000, 
                      catchment = malire_new, 
                     raster_population = kasungu_population_2020)

run2k_2020$pop_2km <- run2k_2020$pop_estimate

# Estimate population living within 3km radius from water bodies
run3k_2020 <- nachulu(water = surfaceWater_2020, distance = 3000, catchment = malire_new, 
                     raster_population = kasungu_population_2020)

run3k_2020$pop_3km <- run3k_2020$pop_estimate

View the estimated population

Map the outputs from the nachulu function: layers of polygons representing health facility catchment areas, with a field in the attribute table containing the estimated catchment population pop_estimate in 2017, 2018, 2019 and 2020. In areas where the input data is out of data, e.g, no presence of waterbody polygons, the estimated population is missing.

# Function to create maps of estimated population out of sf objects from the nachulu function
estimatedpop <- function(df, vname = "pop_estimate", title, legend.title = NA){
  # estimated population map
  # df: estimated population layer from nachulu function
  # vname: variable name (as character, in qoutes)
  # title: map title in quotes
  # legend.title: legend title in qoutes
  # returns:
  #   a tmap-element (plots a map)
  tm_shape(df)+
    tm_fill(col = vname, style = "quantile", 
            palette = "Reds", n = 5, title = legend.title)+
    tm_borders()+
    tm_layout(legend.position = c(0.7,"bottom"),
              title.size = .3,
              frame = FALSE)+
    tm_credits(title, position = c(0,0.75), size = 1)
}

run1k_2017_map <- estimatedpop(run1k_2017, title = "2017: 1km buffers", 
                               legend.title = "Estimated \n population")

run2k_2017_map <- estimatedpop(run2k_2017, title = "2017: 2km buffers", 
                               legend.title = "Estimated \n population")

run3k_2017_map <- estimatedpop(run3k_2017, title = "2017: 3km buffers", 
                               legend.title = "Estimated \n population")

run1k_2018_map <- estimatedpop(run1k_2018, title = "2018: 1km buffers", 
                               legend.title = "Estimated \n population")

run2k_2018_map <- estimatedpop(run2k_2018, title = "2018: 2km buffers", 
                               legend.title = "Estimated \n population")

run3k_2018_map <- estimatedpop(run3k_2018, title = "2018: 3km buffers", 
                               legend.title = "Estimated \n population")

run1k_2019_map <- estimatedpop(run1k_2019, title = "2019: 1km buffers", 
                               legend.title = "Estimated \n population")

run2k_2019_map <- estimatedpop(run2k_2019, title = "2019: 2km buffers", 
                               legend.title = "Estimated \n population")

run3k_2019_map <- estimatedpop(run3k_2019, title = "2019: 3km buffers", 
                               legend.title = "Estimated \n population")

run1k_2020_map <- estimatedpop(run1k_2020, title = "2020: 1km buffers", 
                               legend.title = "Estimated \n population")

run2k_2020_map <- estimatedpop(run2k_2020, title = "2020: 2km buffers", 
                               legend.title = "Estimated \n population")

run3k_2020_map <- estimatedpop(run3k_2020, title = "2020: 3km buffers", 
                               legend.title = "Estimated \n population")

tmap_arrange(run1k_2017_map, run2k_2017_map, run3k_2017_map, 
             run1k_2018_map, run2k_2018_map, run3k_2018_map,
             run1k_2019_map, run2k_2019_map, run3k_2019_map,
             run1k_2020_map, run2k_2020_map, run3k_2020_map, ncol = 3)
Fig 6. Estimated population in Kasungu health facility catchments

Fig 6. Estimated population in Kasungu health facility catchments

Merge estimated population within the various buffers into a single data frame

First, we need to convert the sf outputs from the nachulu function to a plain data frame with as.data.frame, which drops the geometry and gives back a plain data frame

# Convert sf objects to plain data frame
# 2017 -------------------------------------------------------------------------
run1k_2017_df <- as.data.frame(run1k_2017)[,c("DN", "pop_1km")]
run2k_2017_df <- as.data.frame(run2k_2017)[,c("DN", "pop_2km")]
run3k_2017_df <- as.data.frame(run3k_2017)[,c("DN", "pop_3km")]

# 2018 -------------------------------------------------------------------------
run1k_2018_df <- as.data.frame(run1k_2018)[,c("DN", "pop_1km")]
run2k_2018_df <- as.data.frame(run2k_2018)[,c("DN", "pop_2km")]
run3k_2018_df <- as.data.frame(run3k_2018)[,c("DN", "pop_3km")]

# 2019 -------------------------------------------------------------------------
run1k_2019_df <- as.data.frame(run1k_2019)[,c("DN", "pop_1km")]
run2k_2019_df <- as.data.frame(run2k_2019)[,c("DN", "pop_2km")]
run3k_2019_df <- as.data.frame(run3k_2019)[,c("DN", "pop_3km")]

# 2020 -------------------------------------------------------------------------
run1k_2020_df <- as.data.frame(run1k_2020)[,c("DN", "pop_1km")]
run2k_2020_df <- as.data.frame(run2k_2020)[,c("DN", "pop_2km")]
run3k_2020_df <- as.data.frame(run3k_2020)[,c("DN", "pop_3km")]

# Merge the data frames. NB: one important variable 'dry season malaria cases' 
# is yet to be added to the data frames
finalized_2017_df <- cbind.data.frame(run1k_2017_df, run2k_2017_df, run3k_2017_df)

finalized_2018_df <- cbind.data.frame(run1k_2018_df, run2k_2018_df, run3k_2018_df)

finalized_2019_df <- cbind.data.frame(run1k_2019_df, run2k_2019_df, run3k_2019_df)

finalized_2020_df <- cbind.data.frame(run1k_2020_df, run2k_2020_df, run3k_2020_df)


# Exclude unnecessary columns and rows
# 2017 -------------------------------------------------------------------------
finalized_2017_df <- finalized_2017_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")] 
             
finalized_2017_df <- cbind("Fid" = rownames(finalized_2017_df), finalized_2017_df)

# 2018 -------------------------------------------------------------------------
finalized_2018_df <- finalized_2018_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")]

finalized_2018_df <- cbind("Fid" = rownames(finalized_2018_df), finalized_2018_df)

# 2019 -------------------------------------------------------------------------
finalized_2019_df <- finalized_2019_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")] 
             
finalized_2019_df <- cbind("Fid" = rownames(finalized_2019_df), finalized_2019_df)

# 2020 -------------------------------------------------------------------------
finalized_2020_df <- finalized_2020_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")] 
             
finalized_2020_df <- cbind("Fid" = rownames(finalized_2020_df), finalized_2020_df) 

Tidy and merge the estimated population data with the malaria data

The estimated population within 1km, 2km and 3km buffers around water bodies is now being merged with the 2017 to 2020 dry season malaria dataframe using a user-defined function: organize

# Create a function that merges and tidy the population and malaria dataframes 
organize <- function(malaria_data, pop_data){
  
  # function to merge, tidy dry season malaria and estimated population datasets
  # arguments:
  #   malaria_data: sf point object with a data frame containing aggregated 
  #                 malaria data at health facilities level
  #   pop_data: sf polygon object with a data frame containing the estimated population
  # returns:
  #   hfc_malaria_pop: sf objects with a data frame containing dry season malaria and estimated population


  # Convert sf objects to spatial
  estimated_pop_shp <- as(pop_data, "Spatial")
  malaria_shp <- as(malaria_data, "Spatial")

  # Match CRS
  malaria_shp <- spTransform(malaria_shp, crs(estimated_pop_shp))

  # Overlay aggregated health facility points and extract estimated population
  # Using 'point.in.poly' to return a point spatial object, in this case location of health facilities
  # and estimated population instead of sp::over function, which simply returns 
  # a data frame, with the same no. rows.
  # Argument 'sp = TRUE' returns an sp class object, else returns sf class object
  health_facilities_pop <- spatialEco::point.in.poly(malaria_shp, estimated_pop_shp, sp = TRUE) 
 
  # head(health_facilities_pop@data)

  # Add the extracted ID, health facility names, dry season malaria cases and estimated population to 
  # the health facility catchments (hfc)
  hfc_pop_malaria_shp <- merge(estimated_pop_shp, health_facilities_pop, 
                               by.x = "FID", by.y = "DN")

  # Convert the merged shapefile containing estimated population and malaria data to sf-object
  hfc_pop_malaria_sf <- sf::st_as_sf(hfc_pop_malaria_shp)

  # Tidy the data by reordering columns and dropping those not needed
  # First, get column names
  # colnames(hfc_pop_malaria_sf)
  # [1] "DN"           "pop_1km"      "pop_estimate" "FID"          "Names"        "dr_2017"     
  # [7] "dr_2018"      "dr_2019"      "dr_2020"      "coords.x1"    "coords.x2"    "geometry"

  hfc_pop_malaria_sf_trim <- hfc_pop_malaria_sf %>% 
    dplyr::select(-c(pop_estimate, coords.x1, coords.x2))
 
  # Reorder columns by position
  hfc_malaria_pop_sf <- hfc_pop_malaria_sf_trim[, c(4, 3, 1, 2, 5, 6, 7, 8, 9)]

  return(out = hfc_malaria_pop_sf)

}

# Invoking the function 
# 2017 data -------------------------------------------------------------------------------
hfc_1km_2017_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2017)
hfc_2km_2017_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2017)
hfc_3km_2017_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2017)

# 2018 data -------------------------------------------------------------------------------
hfc_1km_2018_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2018)
hfc_2km_2018_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2018)
hfc_3km_2018_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2018)

# 2019 data -------------------------------------------------------------------------------
hfc_1km_2019_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2019) 
hfc_2km_2019_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2019)
hfc_3km_2019_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2019)

# 20 20 data ------------------------------------------------------------------------------
hfc_1km_2020_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2020)
hfc_2km_2020_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2020)
hfc_3km_2020_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2020)

# Check if merging and tidying worked
# Create a user-defined function that maps the merged sf objects
malaria_map <- function(df, year = NA, title, legend.title = NA){
  # dry season malaria cases map
  # df: sf objects from 'organize' function
  # vname: variable name (as character, in qoutes)
  # title: map title in quotes
  # legend.title: legend title in qoutes
  # returns:
  #   a tmap-element (plots a map)
  tm_shape(df)+
    tm_fill(col = year, palette = "Oranges", n = 5, title = legend.title,
            breaks = c(400, 4000, 10000, 16000, 20000, 30000 ))+
    tm_borders("burlywood")+
    tm_layout(legend.position = c(0.6, "bottom"),
              legend.text.size = 0.6,
              legend.title.size = 0.8,
              title.size = .3,
              frame = FALSE)+
     tm_credits(title, position = c(0.2, 0.8), size = .9)+
    tm_view(view.legend.position = c("right", "bottom"))
}

dry_season_malaria_2017 <- malaria_map(hfc_1km_2017_sf, year = "dr_2017", title = "2017", 
                                       legend.title = "2017 malaria cases")

dry_season_malaria_2018 <- malaria_map(hfc_1km_2018_sf, year = "dr_2018", title = "2018",
                                       legend.title = "2018 malaria cases")

dry_season_malaria_2019 <- malaria_map(hfc_1km_2019_sf, year = "dr_2019", title = "2019",
                                       legend.title = "2019 malaria cases")

dry_season_malaria_2020 <- malaria_map(hfc_1km_2020_sf, year = "dr_2020", title = "2020",
                                       legend.title = "2020 malaria cases")

tmap::tmap_arrange(dry_season_malaria_2017, dry_season_malaria_2018,
                   dry_season_malaria_2019, dry_season_malaria_2020, ncol = 2)
Fig. 7: Dry season malaria cases, Kasungu

Fig. 7: Dry season malaria cases, Kasungu

Box plot of Kasungu dry season malaria cases from 2017 to 2020

dry_season_malaria <- as(hfc_1km_2017_sf, "Spatial")

ggplot2::ggplot(reshape2::melt(dry_season_malaria@data[, c("dr_2017", "dr_2018", "dr_2019", "dr_2020")]), 
  aes(variable, value)) +
  geom_boxplot() +
  xlab("Years") +
  ylab("Malaria cases") +
  theme_classic() 
Fig. 8: Dry season malaria cases from 2017 - 2020 in Kasungu

Fig. 8: Dry season malaria cases from 2017 - 2020 in Kasungu

Transfrom and combine dataframe

# 2017 dataframes --------------------------------------------------------------
hfc_1km_2017_df <- hfc_1km_2017_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(Names, FID, pop_1km_2017 = `pop_1km`,
                                       dr_2017, dr_2018, dr_2019, dr_2020)
 
hfc_2km_2017_df <- hfc_2km_2017_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID,pop_2km_2017 = `pop_2km`)

hfc_3km_2017_df <- hfc_3km_2017_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2017 = `pop_3km`)

# 2018 dataframes --------------------------------------------------------------
hfc_1km_2018_df <- hfc_1km_2018_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_1km_2018 = `pop_1km`)

hfc_2km_2018_df <- hfc_2km_2018_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_2km_2018 = `pop_2km`)

hfc_3km_2018_df <- hfc_3km_2018_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2018 = `pop_3km`)

# 2019 dataframes --------------------------------------------------------------
hfc_1km_2019_df <- hfc_1km_2019_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_1km_2019 = `pop_1km`)

hfc_2km_2019_df <- hfc_2km_2019_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_2km_2019 = `pop_2km`)

hfc_3km_2019_df <- hfc_3km_2019_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2019 = `pop_3km`)

# 2020 dataframes --------------------------------------------------------------
hfc_1km_2020_df <- hfc_1km_2020_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_1km_2020 = `pop_1km`)

hfc_2km_2020_df <- hfc_2km_2020_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_2km_2020 = `pop_2km`)

hfc_3km_2020_df <- hfc_3km_2020_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2020 = `pop_3km`)

# Bind the dataframes and create new columns containing malaria incidence rate for each year
# The malaria proportion (mProp) is the number of cases of malaria divided by the 
# total population (Pop).
kasungu_model_df <- cbind(hfc_1km_2017_df, hfc_2km_2017_df, hfc_3km_2017_df,
                          hfc_1km_2018_df, hfc_2km_2018_df, hfc_3km_2018_df,
                          hfc_1km_2019_df, hfc_2km_2019_df, hfc_3km_2019_df,
                          hfc_1km_2020_df, hfc_2km_2020_df, hfc_3km_2020_df) %>% 
  dplyr::select(-FID) %>%
  dplyr::rowwise() %>% 
  dplyr::mutate(mProp_1km2017 = round(dr_2017/pop_1km_2017, 1),
                mProp_2km2017 = round(dr_2017/pop_2km_2017, 1),
                mProp_3km2017 = round(dr_2017/pop_3km_2017, 1))

Save 2017, 2018, 2019 and 2020 merged data

After knitting this R markdown, run the Kasungu_model.R to fit a glm to the covariates: estimated population around water bodies and dry season malaria cases

write.csv(kasungu_model_df, file = "data/Kasungu_hfc_malaria_pop_2017_2020.csv")